This file is used to prepare the figures for the paper.
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
library(org.Mm.eg.db)
.libPaths()
## [1] "/usr/local/lib/R/library"
Here are the folders where analyzes are stored :
data_dir = "./.."
list.files(data_dir)
## [1] "0_intro" "1_metadata" "2_individual" "3_combined" "4_zoom"
## [6] "5_wu" "6_figures" "LICENSE" "index.html" "index_layout"
We load the dataset containing all cells :
sobj = readRDS(paste0(data_dir, "/3_combined/hs_hd_sobj.rds"))
sobj
## An object of class Seurat
## 20003 features across 12111 samples within 1 assay
## Active assay: RNA (20003 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne
These are all the samples analyzed :
sample_info = readRDS(paste0(data_dir, "/1_metadata/hs_hd_sample_info.rds"))
# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
as.data.frame.table(., stringsAsFactors = FALSE) %>%
`colnames<-`(c("sample_identifier", "nb_cells")) %>%
dplyr::left_join(x = ., y = sample_info, by = "sample_identifier")
# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
These are the custom colors for cell populations :
color_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_color_markers.rds"))
color_markers = color_markers[names(color_markers) != "melanocytes"]
ors_color = color_markers["ORS"]
color_markers["ORS"] = color_markers["IFE"]
color_markers["IFE"] = ors_color
color_markers["B cells"] = "chocolate3"
rm(ors_color)
# re-order
color_markers = color_markers[c("CD4 T cells", "CD8 T cells", "Langerhans cells", "macrophages", "B cells",
"cuticle", "cortex", "medulla", "IRS", "proliferative",
"HFSC", "ORS", "IBL", "IFE", "sebocytes")]
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(hjust = 1, angle = 20))
We define custom colors for sample type :
sample_type_colors = setNames(nm = levels(sample_info$sample_type),
c("#C55F40", "#2C78E6"))
data.frame(cell_type = names(sample_type_colors),
color = unlist(sample_type_colors)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(sample_type_colors), breaks = names(sample_type_colors)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We set a background color :
bg_color = "gray94"
This is the correspondence between cell types and cell families, and custom colors to color cells by cell category :
custom_order_cell_type = data.frame(
cell_type = names(color_markers),
cell_category = c(rep("immune cells", 5),
rep("matrix", 5),
rep("non matrix", 5)),
stringsAsFactors = FALSE)
custom_order_cell_type$cell_type = factor(custom_order_cell_type$cell_type,
levels = custom_order_cell_type$cell_type)
rownames(custom_order_cell_type) = custom_order_cell_type$cell_type
category_color = c("immune cells" = "slateblue1",
"matrix" = "mediumseagreen",
"non matrix" = "firebrick3")
We load markers to display on a dotplot to assess cell type annotation :
dotplot_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = dotplot_markers[names(dotplot_markers) != "melanocytes"]
lengths(dotplot_markers)
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 2 2 2 2
## B cells cuticle cortex medulla
## 2 2 2 2
## IRS proliferative IBL ORS
## 2 2 2 2
## IFE HFSC sebocytes
## 2 2 2
Custom functions to display gene expression on the heatmap :
color_fun = function(one_gene) {
gene_range = range(ht_annot[, one_gene])
gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
breaks = seq(from = gene_range[1], to = gene_range[2],
length.out = length(aquarius::color_gene)))
return(gene_palette)
}
This is the projection name to visualize cells :
name2D = "harmony_38_tsne"
name2D_atlas = name2D
We make a low resolutive clustering for the heatmap :
sobj = Seurat::FindClusters(sobj, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12111
## Number of edges: 475472
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9421
## Number of communities: 17
## Elapsed time: 1 seconds
length(levels(sobj$seurat_clusters))
## [1] 17
We define cluster type and cluster category :
sobj$cell_type = sobj$cell_type %>%
as.character() %>%
factor(., levels = names(color_markers))
cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(sobj$cell_type)[cluster_type])
sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
levels = levels(sobj$cell_type))
sobj$cluster_category = custom_order_cell_type[sobj$cluster_type, "cell_category"]
sobj$cluster_category = factor(sobj$cluster_category,
levels = names(category_color))
Project name :
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))
# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
as.data.frame() %>%
`colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj$project_name
cells_coord = cells_coord[(rnd_order), ]
# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
ggplot2::geom_point(size = 0.5) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
ggplot2::theme_void() +
ggplot2::theme(aspect.ratio = 1,
legend.position = "none")
Sample type :
# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
as.data.frame() %>%
`colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_type = sobj$sample_type
cells_coord = cells_coord[(rnd_order), ]
# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_type)) +
ggplot2::geom_point(size = 0.5) +
ggplot2::scale_color_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
ggplot2::theme_void() +
ggplot2::theme(aspect.ratio = 1,
legend.position = "none")
Cluster :
grey_palette = setNames(nm = levels(sobj$seurat_clusters),
rep("#D9D9D9", length(levels(sobj$seurat_clusters))))
grey_palette[c("7", "16", "1", "12", "11", "10", "15")] = "#BDBDBD"
grey_palette[c("16", "14", "5", "9")] = "#969696"
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.4,
group.by = "seurat_clusters", cols = grey_palette,
label = TRUE, label.size = 6) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Cell type annotation :
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
group.by = "cell_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Cluster category annotation :
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
group.by = "cluster_category", cols = category_color) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Cell type annotation split by condition :
plot_list = aquarius::plot_split_dimred(sobj, reduction = name2D,
group_by = "cell_type",
group_color = color_markers,
split_by = "sample_type",
split_color = sample_type_colors,
bg_color = bg_color)
patchwork::wrap_plots(plot_list) &
Seurat::NoLegend()
Gene expression to assess annotation :
genes = c("PTPRC", "MSX2", "KRT14",
# For supplementary figure
"VIM", "FGF18", "SOX9", "TCF3", "TBX1",
"RUNX1", "NFATC1", "LHX2", "CD34")
plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
gene = genes[[gene_id]]
p = Seurat::FeaturePlot(sobj, features = gene, reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
breaks = c(0:6)) +
ggplot2::labs(title = gene) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, size = 17),
plot.subtitle = element_text(hjust = 0.5, size = 15),
legend.text = element_text(size = 15)) +
Seurat::NoAxes()
return(p)
})
names(plot_list) = genes
plot_list
## $PTPRC
##
## $MSX2
##
## $KRT14
##
## $VIM
##
## $FGF18
##
## $SOX9
##
## $TCF3
##
## $TBX1
##
## $RUNX1
##
## $NFATC1
##
## $LHX2
##
## $CD34
Barplot by cluster category :
quantif = table(sobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_cells"))
aquarius::plot_barplot(df = table(sobj$sample_identifier,
sobj$cluster_category) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_fill()) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
label.size = 0, size = 4) +
ggplot2::scale_fill_manual(values = unlist(category_color),
breaks = names(category_color),
name = "Cell category") +
ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
labels = paste0(seq(0, 100, by = 25), sep = " %"),
expand = ggplot2::expansion(add = c(0, 0.05))) +
ggplot2::theme(axis.title.y = element_blank(),
axis.line.x = element_line(colour = "lightgray"),
text = element_text(size = 15),
axis.text.x = element_text(margin = margin(t = 25, r = 0, b = 0, l = 0)),
legend.position = "none")
Heatmap of cluster proportion by sample :
group_by = "seurat_clusters"
cluster_by_sample = table(sobj$sample_identifier,
sobj@meta.data[, group_by]) %>%
prop.table(margin = 1) %>%
as.matrix()
## Right annotation : number of cells by dataset
ht_annot = table(sobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("sample_identifier", "nb_cells")) %>%
`rownames<-`(.$sample_identifier) %>%
dplyr::select(-sample_identifier)
ha_right = ComplexHeatmap::HeatmapAnnotation(
df = ht_annot,
which = "row",
show_legend = TRUE,
annotation_name_side = "top",
col = list(nb_cells = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
breaks = seq(from = range(ht_annot$nb_cells)[1],
to = range(ht_annot$nb_cells)[2],
length.out = 9))))
## Left annotation : gender
ha_left = ComplexHeatmap::HeatmapAnnotation(
gender = sample_info$gender,
which = "row",
show_legend = TRUE,
annotation_name_side = "top",
col = list(gender = setNames(nm = c("F", "M"),
c("lightcyan3", "navyblue"))))
## Top annotation : main cell type in this cluster
ht_annot = table(sobj$sample_identifier,
sobj@meta.data[, group_by]) %>%
prop.table(margin = 1) %>%
as.matrix()
ht_annot = table(sobj$cell_type,
sobj@meta.data[, group_by]) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
ht_annot = data.frame(row.names = names(ht_annot),
cell_type = names(color_markers)[ht_annot],
stringsAsFactors = FALSE)
ht_annot = dplyr::left_join(ht_annot, custom_order_cell_type, by = "cell_type") %>%
# Simplification for matrix
dplyr::mutate(cell_type = ifelse(cell_type %in% c("medulla", "cortex", "cuticle"), yes = "hair shaft", no = cell_type)) %>%
# Simplification for T cells
dplyr::mutate(cell_type = ifelse(cell_type %in% c("CD4 T cells", "CD8 T cells"), yes = "T cells", no = cell_type)) %>%
# Simplification for APC
dplyr::mutate(cell_type = ifelse(cell_type %in% c("Langerhans cells", "macrophages"), yes = "APC", no = cell_type)) %>%
# Add color
dplyr::mutate(color = as.character(color_markers[cell_type])) %>%
dplyr::mutate(color = ifelse(cell_type == "hair shaft", yes = "#FFB6C1", no = color)) %>%
dplyr::mutate(color = ifelse(cell_type == "T cells", yes = "#8A6EE6", no = color)) %>%
dplyr::mutate(color = ifelse(cell_type == "APC", yes = "#9CAA4B", no = color))
ha_top = ComplexHeatmap::HeatmapAnnotation(
# cell_type = ht_annot$cell_type,
cell_category = ht_annot$cell_category,
which = "column",
show_legend = TRUE,
show_annotation_name = FALSE,
# annotation_name_side = "left",
col = list(#cell_type = setNames(nm = ht_annot$cell_type,
# ht_annot$color),
cell_category = category_color
))
## Assemble heatmap
ht = ComplexHeatmap::Heatmap(cluster_by_sample,
heatmap_legend_param = list(title = "Proportion",
col = c("#2166AC", "#F7F7F7", "#B2182B")),
# bottom_annotation = ha_bottom,
right_annotation = ha_right,
left_annotation = ha_left,
top_annotation = ha_top,
cluster_rows = TRUE,
cluster_columns = TRUE,
row_title = "Sample",
row_names_gp = grid::gpar(names = sample_info$sample_identifier,
col = sample_info$color,
fontface = "bold"),
column_title = "Cluster",
column_names_centered = TRUE,
row_names_side = "left",
column_names_side = "top",
column_names_rot = 0)
## Draw !
ComplexHeatmap::draw(ht, merge_legends = TRUE)
For the dotplot, we clarify clusters and cell type annotation :
cell_type_in_cluster = table(sobj$cell_type, sobj$seurat_clusters) %>%
prop.table(., margin = 1) %>%
apply(., 1, which.max)
cell_type_in_cluster = cell_type_in_cluster - 1
missing_cluster = setdiff(levels(sobj$seurat_clusters),
cell_type_in_cluster)
cell_type_in_cluster = data.frame(cell_type = c(names(cell_type_in_cluster), cluster_type[missing_cluster]),
cluster_id = c(cell_type_in_cluster, names(cluster_type[missing_cluster])),
stringsAsFactors = FALSE, row.names = NULL) %>%
dplyr::mutate(cluster_id = as.numeric(cluster_id)) %>%
dplyr::arrange(cell_type, cluster_id)
custom_order_cell_type$clusters = custom_order_cell_type %>%
apply(., MARGIN = 1, FUN = function(one_row) {
cell_type = one_row["cell_type"]
clusters = cell_type_in_cluster %>%
dplyr::filter(.data$cell_type == .env$cell_type) %>%
dplyr::pull(cluster_id)
cell_type_cluster = paste0(cell_type, " (", paste0(clusters, collapse = ", "), ")")
return(cell_type_cluster)
}) %>%
factor(., levels = .)
custom_order_cell_type
## cell_type cell_category clusters
## CD4 T cells CD4 T cells immune cells CD4 T cells (2)
## CD8 T cells CD8 T cells immune cells CD8 T cells (2, 14)
## Langerhans cells Langerhans cells immune cells Langerhans cells (6)
## macrophages macrophages immune cells macrophages (6)
## B cells B cells immune cells B cells (16)
## cuticle cuticle matrix cuticle (3)
## cortex cortex matrix cortex (3)
## medulla medulla matrix medulla (11)
## IRS IRS matrix IRS (15)
## proliferative proliferative matrix proliferative (4, 9, 10, 13)
## HFSC HFSC non matrix HFSC (7, 8)
## ORS ORS non matrix ORS (0)
## IBL IBL non matrix IBL (1)
## IFE IFE non matrix IFE (5)
## sebocytes sebocytes non matrix sebocytes (12)
Dotplot :
plot_list = aquarius::plot_dotplot(sobj,
markers = c("PTPRC",
"CD3E", "CD4",
"CD3E", "CD8A",
"CD207", "AIF1",
"TREM2", "MSR1",
"CD79A", "CD79B",
# "PRDM1", "KRT85",
"MSX2",
"KRT32", "KRT35",
"KRT31", "PRR9",
"BAMBI", "ALDH1A3",
"KRT71", "KRT73",
"TOP2A", "MCM5",
"KRT14", "CXCL14",
"KRT15", "COL17A1",
"DIO2", "TCEAL2",
"KRT16", "KRT6C",
"SPINK5", "LY6D",
"CLMP", "PPARG"),
assay = "RNA", column_name = "cell_type", nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(legend.position = "left",
legend.justification = "bottom",
legend.box = "vertical",
legend.box.margin = margin(0,70,0,0),
axis.title = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
plot.margin = unit(rep(0, 4), "cm"))
p = ggplot2::ggplot(custom_order_cell_type, aes(x = clusters, y = 0)) +
ggplot2::geom_point(size = 0) +
ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
axis.line.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
plot.margin = unit(c(0,0.5,0.5,0), "cm"))
plot_list = patchwork::wrap_plots(plot_list, p,
ncol = 1, heights = c(25, 1))
plot_list
We load the immune cells dataset :
sobj_ic = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_sobj.rds"))
sobj_ic
## An object of class Seurat
## 15121 features across 2329 samples within 1 assay
## Active assay: RNA (15121 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne
This is the projection name to visualize cells :
name2D = "harmony_20_tsne"
To represent results from differential expression, we load the analyses results :
list_results = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_list_results.rds"))
lapply(list_results, FUN = names)
## $`Langerhans cells`
## [1] "mark" "gsea"
##
## $macrophages
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
##
## $`CD4 T cells`
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
##
## $`CD8 T cells`
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
We defined cluster type and cluster category :
cluster_type = table(sobj_ic$cell_type, sobj_ic$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(sobj_ic$cell_type)[cluster_type])
sobj_ic$cluster_type = cluster_type[sobj_ic$seurat_clusters]
sobj_ic$cluster_type = factor(sobj_ic$cluster_type,
levels = levels(sobj_ic$cell_type))
Immune cells on the full atlas :
sobj$is_immune = (colnames(sobj) %in% colnames(sobj_ic))
Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
group.by = "is_immune", order = "TRUE") +
ggplot2::scale_color_manual(values = c(category_color[["immune cells"]], bg_color),
breaks = c(TRUE, FALSE)) +
ggplot2::labs(title = "Immune cells",
subtitle = paste0(ncol(sobj_ic), " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
Cluster type :
Seurat::DimPlot(sobj_ic, reduction = name2D, pt.size = 1,
group.by = "cluster_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Clustering
Seurat::DimPlot(sobj_ic, reduction = name2D, pt.size = 1,
group.by = "seurat_clusters", label = TRUE) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Table to make violin plot by sample, by sample type, and feature plot split by sample type :
list_clusters = list(all = names(cluster_type),
macrophages = 2,
CD4_T_cells = names(cluster_type[cluster_type == "CD4 T cells"]),
CD8_T_cells = names(cluster_type[cluster_type == "CD8 T cells"]))
list_subsobj = lapply(list_clusters, FUN = function(clusters) {
sobj_ic$seurat_clusters = as.character(sobj_ic$seurat_clusters)
# Extract cells of interest
subsobj = subset(sobj_ic, seurat_clusters %in% clusters)
return(subsobj)
})
names(list_subsobj) = names(list_clusters)
rm(list_clusters)
table_ic = data.frame(clusters = c("all", rep("macrophages", 2),
rep("CD4_T_cells", 3), rep("CD8_T_cells", 2)),
feature = c("IL6", "IL1B", "TNF", "GZMA", "IFNG", "IL17A", "PRF1", "GZMB"))
table_ic
## clusters feature
## 1 all IL6
## 2 macrophages IL1B
## 3 macrophages TNF
## 4 CD4_T_cells GZMA
## 5 CD4_T_cells IFNG
## 6 CD4_T_cells IL17A
## 7 CD8_T_cells PRF1
## 8 CD8_T_cells GZMB
Violin plots split by sample :
plot_list = apply(table_ic, 1, FUN = function(one_row) {
# Input
population = one_row[1]
subsobj = list_subsobj[[population]]
feature = one_row[2]
# Make figure
p = Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
features = feature, cols = sample_info$color) +
ggplot2::labs(title = stringr::str_replace_all(population,
pattern = "_",
replacement = " "),
subtitle = feature) +
ggplot2::theme(plot.subtitle = element_text(hjust = 0.5),
axis.title.x = element_blank(),
legend.position = "none")
return(p)
})
names(plot_list) = table_ic$feature
plot_list
## $IL6
##
## $IL1B
##
## $TNF
##
## $GZMA
##
## $IFNG
##
## $IL17A
##
## $PRF1
##
## $GZMB
Feature plots split by sample type :
plot_list = lapply(table_ic$feature, FUN = function(one_gene) {
p = aquarius::plot_split_dimred(sobj_ic,
reduction = name2D,
split_by = "sample_type",
color_by = one_gene,
color_palette = c("gray70", "#FDBB84", "#EF6548", "#7F0000", "black"),
main_pt_size = 0.6,
bg_pt_size = 0.6,
order = TRUE,
bg_color = "gray95")
p = patchwork::wrap_plots(p, nrow = 1) +
patchwork::plot_layout(guides = "collect") +
ggplot2::theme(legend.position = "right") &
ggplot2::theme(plot.subtitle = element_blank())
return(p)
})
names(plot_list) = table_ic$feature
plot_list
## $IL6
##
## $IL1B
##
## $TNF
##
## $GZMA
##
## $IFNG
##
## $IL17A
##
## $PRF1
##
## $GZMB
Violin plots split by sample type :
plot_list = apply(table_ic, 1, FUN = function(one_row) {
# Input
population = one_row[1]
subsobj = list_subsobj[[population]]
feature = one_row[2]
# t-test between sample type
feature_expr = subsobj@assays$RNA@data[feature, ]
feature_hs = feature_expr[subsobj$sample_type == "HS"]
feature_hd = feature_expr[subsobj$sample_type == "HD"]
feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
feature_hs_VS_feature_hd
# Significance associated with p-value
pval = feature_hs_VS_feature_hd$p.value
significance = case_when(pval > 0.05 ~ "ns",
pval > 0.01 & pval <= 0.05 ~ "*",
pval > 0.001 & pval <= 0.01 ~ "**",
pval <= 0.001 ~ "***")
# Plot
p = Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
features = feature) +
ggplot2::scale_fill_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
ggplot2::labs(title = stringr::str_replace_all(population,
pattern = "_",
replacement = " "),
subtitle = feature) +
# Significance bar
ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
size = 0.5,
x = 1, xend = 2,
y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
x = 1.5, y = max(feature_expr)+0.35,
label = significance,
fill = NA, label.size = 0) +
ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
# Theme
ggplot2::theme(plot.subtitle = element_text(hjust = 0.5),
axis.title.x = element_blank(),
legend.position = "none")
return(p)
})
names(plot_list) = table_ic$feature
plot_list
## $IL6
##
## $IL1B
##
## $TNF
##
## $GZMA
##
## $IFNG
##
## $IL17A
##
## $PRF1
##
## $GZMB
Barplot by cluster type :
quantif_total = table(sobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_cells"))
# Number of cytotoxic T cells
quantif_cytotox = sobj_ic@meta.data %>%
dplyr::mutate(cytotoxic = ifelse(seurat_clusters %in% c("5","6"),
yes = "Cytotoxic CD8 T cells",
no = as.character(cluster_type))) %>%
# cells of interest
dplyr::filter(cytotoxic == "Cytotoxic CD8 T cells") %>%
# count by sample
dplyr::select(sample_identifier, cluster_type) %>%
base::droplevels() %>%
table() %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "CellType", "Number")) %>%
dplyr::mutate(isCytotoxic = "cytotoxic")
# Number of immune cells
quantif_immune = table(sobj_ic$sample_identifier, sobj_ic$cluster_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "CellType", "Number")) %>%
dplyr::filter(CellType %in% c("CD4 T cells", "CD8 T cells", "macrophages")) %>%
dplyr::mutate(isCytotoxic = "no") %>%
# join cytotoxic
rbind(., quantif_cytotox) %>%
# proportion for the whole dataset
dplyr::left_join(., quantif_total, by = "Sample") %>%
dplyr::mutate(prop_cells = 100*round(Number/nb_cells,4)) %>%
`colnames<-`(c("Sample", "CellType", "Number", "isCytotoxic", "Total", "Proportion")) %>%
dplyr::mutate(isCytotoxic = factor(isCytotoxic, levels = c("cytotoxic", "no")))
ggplot2::ggplot(data = quantif_immune %>% dplyr::filter(isCytotoxic != "cytotoxic"),
aes(x = Sample, y = Proportion, fill = CellType)) +
ggplot2::geom_bar(stat = "identity", position = position_dodge(width = 0.70),
color = "black", size = 0.25, width = 0.70) +
ggplot2::scale_fill_manual(values = unlist(color_markers[c("CD4 T cells", "CD8 T cells", "macrophages")]),
breaks = names(color_markers[c("CD4 T cells", "CD8 T cells", "macrophages")]),
name = "Cell Type") +
# Overlay a layer for stripe
# ggplot2::geom_bar(data = quantif_immune %>% dplyr::filter(isCytotoxic == "cytotoxic"),
# stat = "identity", position = position_dodge(width = 0.5), fill = "black",
# color = "lightgray", size = 0.25, width = 0.4/3, aes(pattern = isCytotoxic)) +
# ggpattern::geom_col_pattern(data = quantif_immune %>% dplyr::filter(isCytotoxic == "cytotoxic"),
# pattern_density = 0.001,
# pattern_spacing = 0.015,
# pattern_key_scale_factor = 1,
# color = "black", size = 0.25,
# width = 0.37/3,
# pattern_color = "black", # bar border
# show.legend = TRUE) +
# ggpattern::scale_pattern_manual(name = "CD8 T cells", values = "stripe") +
# ggpattern::scale_pattern_angle_manual(name = "CD8 T cells", values = 50) +
# ggplot2::guides(pattern = ggplot2::guide_legend(override.aes = list(fill = "white")),
# fill = ggplot2::guide_legend(override.aes = list(pattern = "none"))) +
# Theme
ggplot2::scale_y_continuous(limits = c(0, max(quantif_immune$Proportion)),
expand = ggplot2::expansion(add = c(0, 1))) +
ggplot2::labs(y = "% of immune cells by sample") +
ggplot2::theme_classic() +
ggplot2::theme(axis.title.x = element_blank(),
axis.line.x = element_line(colour = "lightgray"),
text = element_text(size = 15),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.major.y = element_line(color = "lightgray", size = 0.5),
panel.grid.minor.y = element_line(color = "lightgray", size = 0.5))
Heatmap for macrophages :
subsobj = subset(sobj_ic, cluster_type == "macrophages")
features_oi = c("IL1B", "TNF",
"HLA-DQA2", "HLA-DPA1", "HLA-DRB5",
"HLA-A", "HLA-C", "B2M",
"C1QA", "C1QB", "C1QC")
# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1] 11 463
## Colors
list_colors = list()
# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))
# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
dplyr::arrange(sample_type, sample_identifier) %>%
rownames()
column_order = match(column_order, rownames(subsobj@meta.data))
# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
sample_identifier = subsobj$sample_identifier,
col = list(sample_type = list_colors[["sample_type"]],
sample_identifier = list_colors[["sample_identifier"]]))
# Heatmap
ht = Heatmap(as.matrix(mat_expr),
heatmap_legend_param = list(title = "Expression", at = c(0, 1),
labels = c("low", "high")),
col = list_colors[["expression"]],
top_annotation = ha_top,
show_column_names = FALSE,
column_order = column_order,
column_gap = unit(2, "mm"),
cluster_rows = FALSE,
row_title = NULL,
row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
use_raster = FALSE,
show_heatmap_legend = TRUE,
border = TRUE)
ComplexHeatmap::draw(ht,
merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom")
Heatmap for CD4 T cells :
subsobj = subset(sobj_ic, cluster_type == "CD4 T cells")
features_oi = c("GZMA", "KLRB1", "BTG1", "ZFP36", "NFKBIA", "TXNIP", "CXCR4", "IFNG", "IL17A")
# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1] 9 848
## Colors
list_colors = list()
# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))
# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
dplyr::arrange(sample_type, sample_identifier) %>%
rownames()
column_order = match(column_order, rownames(subsobj@meta.data))
# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
sample_identifier = subsobj$sample_identifier,
col = list(sample_type = list_colors[["sample_type"]],
sample_identifier = list_colors[["sample_identifier"]]))
# Heatmap
ht = Heatmap(as.matrix(mat_expr),
heatmap_legend_param = list(title = "Expression", at = c(0, 1),
labels = c("low", "high")),
col = list_colors[["expression"]],
top_annotation = ha_top,
show_column_names = FALSE,
column_order = column_order,
column_gap = unit(2, "mm"),
cluster_rows = FALSE,
row_title = NULL,
row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
use_raster = FALSE,
show_heatmap_legend = TRUE,
border = TRUE)
ComplexHeatmap::draw(ht,
merge_legend = TRUE,
heatmap_legend_side = "left",
annotation_legend_side = "left")
We load the HFSCs dataset :
sobj_hfsc = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_sobj.rds"))
sobj_hfsc
## An object of class Seurat
## 15384 features across 1454 samples within 1 assay
## Active assay: RNA (15384 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_24_tsne, RNA_pca_24_umap, harmony, harmony_24_umap, harmony_24_tsne
This is the projection name to visualize cells :
name2D = "harmony_24_tsne"
To represent results from differential expression, we load the analyses results :
list_results = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_list_results.rds"))
lapply(list_results, FUN = names)
## $cluster_0_8
## [1] "p_val" "avg_logFC" "pct.1" "pct.2" "p_val_adj"
##
## $cluster_2
## [1] "p_val" "avg_logFC" "pct.1" "pct.2" "p_val_adj"
##
## $cluster_1
## [1] "p_val" "avg_logFC" "pct.1" "pct.2" "p_val_adj"
##
## $cluster_3
## [1] "p_val" "avg_logFC" "pct.1" "pct.2" "p_val_adj"
HFSCs on the full atlas :
sobj$is_hfsc = (colnames(sobj) %in% colnames(sobj_hfsc))
Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
group.by = "is_hfsc", order = "TRUE") +
ggplot2::scale_color_manual(values = c(color_markers[["HFSC"]], bg_color),
breaks = c(TRUE, FALSE)) +
ggplot2::labs(title = "HFSCs",
subtitle = paste0(ncol(sobj_hfsc), " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
KRT15 expression :
Seurat::FeaturePlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
features = "KRT15") +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() + Seurat::NoLegend()
Genes of interest :
genes = c("TGFB2", "ANGPTL7", "FGF18", "MGP", "EPCAM", "KRT75", "NOTCH3", "PTHLH")
plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
gene = genes[[gene_id]]
sobj_hfsc$my_gene = Seurat::FetchData(sobj_hfsc, gene)[, 1] %>%
aquarius::run_rescale(., new_min = 0, new_max = 10)
Seurat::FeaturePlot(sobj_hfsc, features = "my_gene", reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
breaks = seq(0, 10, by = 2.5),
labels = c("min", rep("", 3), "max")) +
ggplot2::labs(title = gene) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, size = 17),
plot.subtitle = element_text(hjust = 0.5, size = 15),
legend.text = element_text(size = 15),
legend.position = "none") +
Seurat::NoAxes()
})
plot_list
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
Project name :
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_hfsc), replace = FALSE, size = ncol(sobj_hfsc))
# Extract coordinates
cells_coord = sobj_hfsc@reductions[[name2D]]@cell.embeddings %>%
as.data.frame() %>%
`colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj_hfsc$project_name
cells_coord = cells_coord[(rnd_order), ]
# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
ggplot2::geom_point(size = 1.2) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
ggplot2::theme_void() +
ggplot2::theme(aspect.ratio = 1,
legend.position = "none")
Cluster :
Seurat::DimPlot(sobj_hfsc, reduction = name2D, pt.size = 1,
group.by = "seurat_clusters", cols = grey_palette,
label = TRUE, label.size = 7) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Heatmap with proportions :
cluster_markers = c("TGFB2", "ANGPTL7", "EPCAM", "KRT75", "NOTCH3", "PTHLH")
## Bottom annotation : gene expression by cluster
ht_annot = Seurat::FetchData(sobj_hfsc, slot = "data", vars = cluster_markers) %>%
as.data.frame()
ht_annot$clusters = sobj_hfsc$seurat_clusters
ht_annot = ht_annot %>%
dplyr::group_by(clusters) %>%
dplyr::summarise_all(funs('mean' = mean)) %>%
as.data.frame() %>%
dplyr::select(-clusters) %>%
`colnames<-`(c(cluster_markers))
ha_bottom = ComplexHeatmap::HeatmapAnnotation(df = ht_annot,
which = "column",
show_legend = TRUE,
col = setNames(nm = cluster_markers,
lapply(cluster_markers, FUN = color_fun)),
annotation_name_side = "left")
## Right annotation : number of cells by dataset
ht_annot = table(sobj_hfsc$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("sample_identifier", "nb_cells")) %>%
`rownames<-`(.$sample_identifier) %>%
dplyr::select(-sample_identifier)
ha_right = ComplexHeatmap::HeatmapAnnotation(
df = ht_annot,
which = "row",
show_legend = TRUE,
annotation_name_side = "bottom",
col = list(nb_cells = circlize::colorRamp2(colors = RColorBrewer::brewer.pal(name = "Greys", n = 9),
breaks = seq(from = range(ht_annot$nb_cells)[1],
to = range(ht_annot$nb_cells)[2],
length.out = 9))))
## Heatmap
ht = aquarius::plot_prop_heatmap(df = sobj_hfsc@meta.data[, c("sample_identifier", "seurat_clusters")],
bottom_annotation = ha_bottom,
# right_annotation = ha_right,
cluster_rows = TRUE,
column_names_centered = TRUE,
prop_margin = 1,
row_names_gp = grid::gpar(names = sample_info$sample_identifier,
col = sample_info$color,
fontface = "bold"),
row_title = "Sample",
column_title = "Cluster")
ComplexHeatmap::draw(ht,
merge_legend = TRUE,
heatmap_legend_side = "bottom")
Heatmap for cluster 0 and 8 :
subsobj = subset(sobj_hfsc, seurat_clusters %in% c(0,8))
features_oi = rownames(list_results$cluster_0_8)
features_oi = features_oi[!grepl(features_oi, pattern = "^RP")]
# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = cbind(mat_expr, subsobj$percent.mt)
colnames(mat_expr)[ncol(mat_expr)] = "percent.rb"
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1] 48 631
## Colors
list_colors = list()
# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))
# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
sample_info$color)
# list_colors[["seurat_clusters"]] = setNames(aquarius::gg_color_hue(length(levels(subsobj$seurat_clusters))),
# nm = levels(subsobj$seurat_clusters))
# Cells order
column_order = subsobj@meta.data %>%
dplyr::arrange(sample_type, sample_identifier) %>%
rownames()
column_order = match(column_order, rownames(subsobj@meta.data))
# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
sample_identifier = subsobj$sample_identifier,
# clusters = subsobj$seurat_clusters,
col = list(sample_type = list_colors[["sample_type"]],
sample_identifier = list_colors[["sample_identifier"]]
# clusters = list_colors[["seurat_clusters"]]
))
# g1 : REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM
# g2 : GOBP_APOPTOTIC_PROCESS
g1_genes = c("B2M", "HLA-C", "HLA-A", "MIF", "PPIA", "JUNB", "IFITM3")
g2_genes = c("Jun", "ATF3", "BTG2", "RHOB", "NFKBIA", "SGK1", "KLF9",
"CAV1", "DDIT4", "PDK4", "TXNIP", "RNF1152", "TLE1")
ha_right = data.frame(genes = c(features_oi, "percent.rb"), rownames = c(features_oi, "percent.rb"))
ha_right$group = case_when(ha_right$genes %in% g1_genes ~ "REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM",
ha_right$genes %in% g2_genes ~ "GOBP_APOPTOTIC_PROCESS",
TRUE ~ "others")
list_colors[["group"]] = setNames(
nm = c("REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM", "GOBP_APOPTOTIC_PROCESS", "others"),
c("red", "black", "gray90"))
ha_right = HeatmapAnnotation(group = ha_right$group,
col = list(group = list_colors[["group"]]),
which = "row",
show_annotation_name = FALSE,
show_legend = TRUE)
# Heatmap
ht = Heatmap(as.matrix(mat_expr),
heatmap_legend_param = list(title = "Expression", at = c(0, 1),
labels = c("low", "high")),
col = list_colors[["expression"]],
top_annotation = ha_top,
right_annotation = ha_right,
show_column_names = FALSE,
column_order = column_order,
column_gap = unit(2, "mm"),
cluster_rows = FALSE,
row_title = NULL,
row_names_gp = grid::gpar(fontsize = 10, fontface = "plain"),
use_raster = FALSE,
show_heatmap_legend = TRUE,
border = TRUE)
ComplexHeatmap::draw(ht,
merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "bottom")
Violin plots split by sample :
features_oi = c("IFITM3", "DDIT4")
plot_list = lapply(features_oi, FUN = function(feature) {
p = Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
features = feature, cols = sample_info$color) +
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "none")
return(p)
})
names(plot_list) = features_oi
plot_list
## $IFITM3
##
## $DDIT4
Feature plots split by sample type :
plot_list = lapply(features_oi, FUN = function(one_gene) {
p = aquarius::plot_split_dimred(sobj_hfsc,
reduction = name2D,
split_by = "sample_type",
color_by = one_gene,
color_palette = c("gray70", "#FDBB84", "#EF6548", "#7F0000", "black"),
main_pt_size = 0.6,
bg_pt_size = 0.6,
order = TRUE,
bg_color = "gray95")
p = patchwork::wrap_plots(p, nrow = 1) +
patchwork::plot_layout(guides = "collect") +
ggplot2::theme(legend.position = "right") &
ggplot2::theme(plot.subtitle = element_blank())
return(p)
})
names(plot_list) = features_oi
plot_list
## $IFITM3
##
## $DDIT4
Violin plots split by sample type :
plot_list = lapply(features_oi, FUN = function(feature) {
# t-test between sample type
feature_expr = subsobj@assays$RNA@data[feature, ]
feature_hs = feature_expr[subsobj$sample_type == "HS"]
feature_hd = feature_expr[subsobj$sample_type == "HD"]
feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
feature_hs_VS_feature_hd
# Significance associated with p-value
pval = feature_hs_VS_feature_hd$p.value
significance = case_when(pval > 0.05 ~ "ns",
pval > 0.01 & pval <= 0.05 ~ "*",
pval > 0.001 & pval <= 0.01 ~ "**",
pval <= 0.001 ~ "***")
# Plot
p = Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
features = feature) +
ggplot2::scale_fill_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
# Significance bar
ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
size = 0.5,
x = 1, xend = 2,
y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
x = 1.5, y = max(feature_expr)+0.35,
label = significance,
fill = NA, label.size = 0) +
ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
# Theme
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "none")
return(p)
})
names(plot_list) = features_oi
plot_list
## $IFITM3
##
## $DDIT4
Barplot with number of HFSCs and total number of cells :
quantif = dplyr::left_join(
x = table(sobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_cells")),
y = table(sobj_hfsc$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_hfsc")),
by = "Sample") %>%
dplyr::mutate(prop_hfsc = round(100*nb_hfsc / nb_cells, 2))
quantif_to_plot = rbind.data.frame(
data.frame(Sample = quantif$Sample,
nb_cells = quantif$nb_cells - quantif$nb_hfsc,
cell_type = "others",
stringsAsFactors = FALSE),
data.frame(Sample = quantif$Sample,
nb_cells = quantif$nb_hfsc,
cell_type = "hfsc",
stringsAsFactors = FALSE)) %>%
dplyr::mutate(cell_type = factor(cell_type, levels = c("others", "hfsc")))
aquarius::plot_barplot(df = quantif_to_plot,
x = "Sample", y = "nb_cells", fill = "cell_type",
position = position_fill()) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = .data$Sample, y = 0.05+.data$prop_hfsc/100, label = .data$nb_hfsc),
label.size = 0, size = 5, fill = NA) +
ggplot2::scale_fill_manual(values = c("gray90", color_markers[["HFSC"]]),
breaks = c("others", "hfsc"),
name = "Cell Type") +
ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
labels = paste0(seq(0, 100, by = 25), sep = " %"),
expand = ggplot2::expansion(add = c(0, 0.05))) +
ggplot2::theme(axis.title.y = element_blank(),
axis.line.x = element_line(colour = "lightgray"),
text = element_text(size = 15),
legend.position = "none")
We load the IBL + ORS dataset :
sobj_iblors = readRDS(paste0(data_dir, "/4_zoom/3_zoom_iblmors/iblmors_sobj.rds"))
sobj_iblors
## An object of class Seurat
## 16701 features across 3532 samples within 1 assay
## Active assay: RNA (16701 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne
This is the projection name to visualize cells :
name2D = "harmony_20_tsne"
To represent results from differential expression, we load the analyses results :
list_results = readRDS(paste0(data_dir, "/4_zoom/3_zoom_iblmors/iblmors_list_results.rds"))
lapply(list_results, FUN = names)
## $IBL_vs_ORS
## [1] "mark" "enrichr_ibl" "enrichr_ors" "gsea"
##
## $cluster5_vs_ORS
## [1] "mark" "enrichr_up" "enrichr_dn" "gsea"
##
## $IBL_HS_vs_HD
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
##
## $ORS_HS_vs_HD
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
We defined cluster type :
cluster_type = table(sobj_iblors$cell_type, sobj_iblors$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(sobj_iblors$cell_type)[cluster_type])
sobj_iblors$cluster_type = cluster_type[sobj_iblors$seurat_clusters]
sobj_iblors$cluster_type = factor(sobj_iblors$cluster_type,
levels = c("IBL", "ORS"))
IBL + ORS on the full atlas :
sobj$cell_bc = colnames(sobj)
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj$is_iblors = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
by = "cell_bc")[, "cluster_type"]
Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
group.by = "is_iblors", order = "TRUE") +
ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS")], bg_color),
breaks = c("IBL", "ORS", NA), na.value = bg_color) +
ggplot2::labs(title = "IBL + ORS",
subtitle = paste0(ncol(sobj_iblors), " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
Project name :
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_iblors), replace = FALSE, size = ncol(sobj_iblors))
# Extract coordinates
cells_coord = sobj_iblors@reductions[[name2D]]@cell.embeddings %>%
as.data.frame() %>%
`colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj_iblors$project_name
cells_coord = cells_coord[(rnd_order), ]
# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
ggplot2::geom_point(size = 1.2) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
ggplot2::theme_void() +
ggplot2::theme(aspect.ratio = 1,
legend.position = "none")
Cluster :
Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
group.by = "seurat_clusters", cols = grey_palette,
label = TRUE, label.size = 8) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Cluster type :
Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
group.by = "cluster_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Cluster type split by sample type :
plot_list = aquarius::plot_split_dimred(sobj_iblors, reduction = name2D,
group_by = "cluster_type",
group_color = color_markers,
split_by = "sample_type",
bg_pt_size = 1, main_pt_size = 1,
bg_color = bg_color)
patchwork::wrap_plots(plot_list) &
Seurat::NoLegend()
Number of cells by cluster and by sample :
table(sobj_iblors$seurat_clusters,
sobj_iblors$sample_identifier)
##
## HS_1 HS_2 HS_3 HS_4 HS_5 HD_1 HD_2
## 0 49 15 18 354 82 68 83
## 1 14 0 28 205 49 68 74
## 2 67 17 191 19 38 41 29
## 3 40 18 2 204 24 39 57
## 4 14 2 76 18 215 1 36
## 5 84 83 1 69 28 20 17
## 6 40 9 108 23 76 26 20
## 7 27 7 51 90 21 10 23
## 8 28 7 77 11 20 28 13
## 9 11 4 41 11 73 10 8
## 10 16 3 37 6 26 10 4
table(sobj_iblors$cluster_type,
sobj_iblors$sample_identifier)
##
## HS_1 HS_2 HS_3 HS_4 HS_5 HD_1 HD_2
## IBL 176 42 530 88 448 116 110
## ORS 214 123 100 922 204 205 254
Separate cluster 5 :
sobj_iblors$cluster_type_sep5 = ifelse(sobj_iblors$seurat_clusters == 5,
yes = "ORS_5",
no = as.character(sobj_iblors$cluster_type)) %>%
as.factor()
table(sobj_iblors$cluster_type_sep5,
sobj_iblors$sample_type)
##
## HS HD
## IBL 1284 226
## ORS 1298 422
## ORS_5 265 37
Barplot by cluster category :
quantif = table(sobj_iblors$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_cells"))
quantif_to_plot = table(sobj_iblors$sample_identifier,
sobj_iblors$cluster_type_sep5) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "CellType", "Number")) %>%
dplyr::mutate(Style = ifelse(CellType == "ORS_5", yes = "IL1R2+", no = "IL1R2-")) %>%
dplyr::mutate(Style = factor(Style, levels = c("IL1R2-", "IL1R2+"))) %>%
dplyr::mutate(CellType = ifelse(CellType == "ORS_5", yes = "ORS", no = as.character(CellType))) %>%
`colnames<-`(c("Sample", "Cell Type", "Number", "IL1R2 status"))
aquarius::plot_barplot(df = quantif_to_plot,
x = "Sample", y = "Number",
fill = "Cell Type", pattern = "IL1R2 status",
position = position_fill()) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
label.size = 0, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers[levels(sobj_iblors$cluster_type)]),
breaks = names(color_markers[levels(sobj_iblors$cluster_type)]),
name = "Cell Type") +
ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
labels = paste0(seq(0, 100, by = 25), sep = " %"),
expand = ggplot2::expansion(add = c(0, 0.05))) +
ggplot2::theme(axis.title.y = element_blank(),
axis.line.x = element_line(colour = "lightgray"),
text = element_text(size = 15),
legend.position = "right")
Focus on cluster 5, among ORS :
subsobj = subset(sobj_iblors, cluster_type == "ORS")
subsobj$cluster_type_sep5 = ifelse(subsobj$seurat_clusters == 5,
yes = "ORS_5",
no = as.character(subsobj$cluster_type)) %>%
as.factor()
quantif = table(subsobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_cells"))
quantif_to_plot = table(subsobj$sample_identifier,
subsobj$cluster_type_sep5) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "CellType", "Number")) %>%
dplyr::mutate(Style = ifelse(CellType == "ORS_5", yes = "IL1R2+", no = "IL1R2-")) %>%
dplyr::mutate(Style = factor(Style, levels = c("IL1R2-", "IL1R2+"))) %>%
dplyr::mutate(CellType = ifelse(CellType == "ORS_5", yes = "ORS", no = as.character(CellType))) %>%
`colnames<-`(c("Sample", "Cell Type", "Number", "IL1R2 status")) %>%
dplyr::left_join(., y = quantif, by = "Sample") %>%
dplyr::mutate(prop_cluster5 = round(Number/nb_cells, 4))
aquarius::plot_barplot(df = quantif_to_plot,
x = "Sample", y = "Number",
fill = "IL1R2 status",
position = position_fill()) +
# ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
# aes(x = .data$Sample, y = 1.05, label = .data$nb_cells),
# label.size = 0, size = 5) +
ggplot2::geom_label(data = quantif_to_plot %>%
dplyr::filter(`IL1R2 status` == "IL1R2+"),
inherit.aes = FALSE,
aes(x = .data$Sample, y = prop_cluster5 + 0.05, label = .data$Number),
label.size = 0, size = 5, fill = NA, col = "white") +
ggplot2::scale_fill_manual(values = c("black", color_markers[["ORS"]]),
breaks = c("IL1R2+", "IL1R2-"),
name = "IL1R2 status") +
ggplot2::scale_y_continuous(breaks = seq(0, 1, by = 0.25),
labels = paste0(seq(0, 100, by = 25), sep = " %"),
expand = ggplot2::expansion(add = c(0, 0.05))) +
ggplot2::theme(axis.title.y = element_blank(),
axis.line.x = element_line(colour = "lightgray"),
text = element_text(size = 15),
legend.position = "right")
DE genes between IBL and ORS :
mark = list_results$IBL_vs_ORS$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
# up-regulated in IBL
mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
# up-regulated in ORS
mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
# representative and selective for IBL
mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
# representative and selective for ORS
mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]
avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))
ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
ggplot2::geom_point() +
ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
aes(x = pct.1, y = pct.2, label = gene_name),
col = "black", fill = NA, size = 3.5, label.size = NA) +
ggplot2::labs(x = "Enriched in IBL",
y = "Enriched in ORS") +
ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
values = scales::rescale(unname(avg_logFC_range))) +
ggplot2::theme_classic() +
ggplot2::theme(aspect.ratio = 1)
GSEA plot :
the_gs_name = "REACTOME_KERATINIZATION"
the_content = list_results$IBL_vs_ORS$gsea@result %>%
dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
"\t|\tpvalue : ", round(the_content$pvalue, 4),
"\t|\tset size : ", the_content$setSize, " genes")
enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
geneSetID = the_gs_name) +
ggplot2::labs(title = the_gs_name,
subtitle = the_subtitle) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
margin = ggplot2::margin(3, 3, 5, 3)),
plot.subtitle = ggtext::element_markdown(hjust = 0.5,
size = 10))
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE"
the_content = list_results$IBL_vs_ORS$gsea@result %>%
dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
"\t|\tpvalue : ", round(the_content$pvalue, 4),
"\t|\tset size : ", the_content$setSize, " genes")
enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
geneSetID = the_gs_name) +
ggplot2::labs(title = the_gs_name,
subtitle = the_subtitle) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
margin = ggplot2::margin(3, 3, 5, 3)),
plot.subtitle = ggtext::element_markdown(hjust = 0.5,
size = 10))
Score for both gene sets, in all cells :
significant = function(pval) {
signif = case_when(pval > 0.05 ~ "ns",
pval > 0.01 & pval <= 0.05 ~ "*",
pval > 0.001 & pval <= 0.01 ~ "**",
pval <= 0.001 ~ "***")
return(signif)
}
plot_vln_score = function(score_name, the_gs_name) {
# t-test between sample type
feature_expr = sobj_iblors@meta.data[, score_name]
## in mORS
feature_hs = feature_expr[sobj_iblors$sample_type == "HS" & sobj_iblors$cluster_type == "ORS"]
feature_hd = feature_expr[sobj_iblors$sample_type == "HD" & sobj_iblors$cluster_type == "ORS"]
feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
feature_hs_VS_feature_hd
pval_ors = feature_hs_VS_feature_hd$p.value
## in IBL
feature_hs = feature_expr[sobj_iblors$sample_type == "HS" & sobj_iblors$cluster_type == "IBL"]
feature_hd = feature_expr[sobj_iblors$sample_type == "HD" & sobj_iblors$cluster_type == "IBL"]
feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
feature_hs_VS_feature_hd
pval_ibl = feature_hs_VS_feature_hd$p.value
# Significance associated with p-value
significance_ors = significant(pval_ors)
significance_ibl = significant(pval_ibl)
# Plot
p = Seurat::VlnPlot(sobj_iblors, features = score_name, pt.size = 0.05,
split.by = "sample_type", group.by = "cluster_type") +
ggplot2::scale_fill_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
# Significance bar (IBL)
ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
size = 0.5,
x = 0.75, xend = 1.25,
y = max(feature_expr)*1.2, yend = max(feature_expr)*1.2) +
ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
x = 1, y = max(feature_expr)*1.25,
label = significance_ibl,
fill = NA, label.size = 0) +
# Significance bar (mORS)
ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
size = 0.5,
x = 1.75, xend = 2.25,
y = max(feature_expr)*1.2, yend = max(feature_expr)*1.2) +
ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
x = 2, y = max(feature_expr)*1.25,
label = significance_ors,
fill = NA, label.size = 0) +
ggplot2::lims(y = c(0, 1.3*max(feature_expr))) +
# Style
ggplot2::labs(title = the_gs_name) +
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "none")
return(p)
}
gene_sets = aquarius::get_gene_sets(species = "Homo sapiens")
# Genes in the gene set of interest
the_gs_name = "REACTOME_KERATINIZATION"
the_gs_content = gene_sets$gene_sets_full %>%
dplyr::filter(gs_name == the_gs_name) %>%
dplyr::pull(gene_symbol) %>%
unlist()
# Attribute a score
sobj_iblors$score_kera = Seurat::AddModuleScore(sobj_iblors,
features = list(the_gs_content))$Cluster1
# Violin plot
plot_vln_score("score_kera", the_gs_name)
# Genes in the gene set of interest
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE"
the_gs_content = gene_sets$gene_sets_full %>%
dplyr::filter(gs_name == the_gs_name) %>%
dplyr::pull(gene_symbol) %>%
unlist()
# Attribute a score
sobj_iblors$score_ifng = Seurat::AddModuleScore(sobj_iblors,
features = list(the_gs_content))$Cluster1
# Violin plot
plot_vln_score("score_ifng", the_gs_name)
Violin plot for IBL :
subsobj = subset(sobj_iblors, cluster_type == "IBL")
features_oi = c("DUSP1", "DDIT4", "MIF", "LGALS7", "ARF5", "S100A9")
# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
# t-test between sample type
feature_expr = subsobj@assays$RNA@data[feature, ]
feature_hs = feature_expr[subsobj$sample_type == "HS"]
feature_hd = feature_expr[subsobj$sample_type == "HD"]
feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
feature_hs_VS_feature_hd
# Significance associated with p-value
pval = feature_hs_VS_feature_hd$p.value
significance = case_when(pval > 0.05 ~ "ns",
pval > 0.01 & pval <= 0.05 ~ "*",
pval > 0.001 & pval <= 0.01 ~ "**",
pval <= 0.001 ~ "***")
# Plot
p = Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
features = feature) +
ggplot2::scale_fill_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
# Significance bar
ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
size = 0.5,
x = 1, xend = 2,
y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
x = 1.5, y = max(feature_expr)+0.35,
label = significance,
fill = NA, label.size = 0) +
ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
# Theme
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "none")
return(p)
})
names(plot_list) = features_oi
plot_list
## $DUSP1
##
## $DDIT4
##
## $MIF
##
## $LGALS7
##
## $ARF5
##
## $S100A9
Violin plot for ORS :
subsobj = subset(sobj_iblors, cluster_type == "ORS")
features_oi = c("DUSP1", "KLF6", "CLDN1", "CTGF",
"S100A9", "CCL2", "IFITM3", "IFI27")
# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
# t-test between sample type
feature_expr = subsobj@assays$RNA@data[feature, ]
feature_hs = feature_expr[subsobj$sample_type == "HS"]
feature_hd = feature_expr[subsobj$sample_type == "HD"]
feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
feature_hs_VS_feature_hd
# Significance associated with p-value
pval = feature_hs_VS_feature_hd$p.value
significance = case_when(pval > 0.05 ~ "ns",
pval > 0.01 & pval <= 0.05 ~ "*",
pval > 0.001 & pval <= 0.01 ~ "**",
pval <= 0.001 ~ "***")
# Plot
p = Seurat::VlnPlot(subsobj, group.by = "sample_type", pt.size = 0.3,
features = feature) +
ggplot2::scale_fill_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
# Significance bar
ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
size = 0.5,
x = 1, xend = 2,
y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
x = 1.5, y = max(feature_expr)+0.35,
label = significance,
fill = NA, label.size = 0) +
ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
# Theme
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "none")
return(p)
})
names(plot_list) = features_oi
plot_list
## $DUSP1
##
## $KLF6
##
## $CLDN1
##
## $CTGF
##
## $S100A9
##
## $CCL2
##
## $IFITM3
##
## $IFI27
Split by sample :
Seurat::VlnPlot(subsobj, group.by = "sample_identifier",
features = "IFI27", cols = sample_info$color) +
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "none")
Heatmap for cluster 5 vs other ORS :
subsobj = subset(sobj_iblors, cluster_type == "ORS")
features_oi = c("YBX3", "TXNIP", "KRT14", "KRT15", "NEAT1",
"FXYD3", "MT2A", "MT1E", "MT1X", "AQP3", "GLUL",
# "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
"FOS", "JUNB", "DUSP1", "ZFP36", "NFKBIZ",
"ATF3", "RHOB", "ETS2", "IL18", "KLF4", "KLF6", "KLF9",
"KLF3", "KLF5", "COL17A1", "THSD4", "WNT3", "WNT4", "SLPI", "PLAT",
"LAMB4", "DCN", "SPINK5",
"GSTM3", "ALDH3A1", "LGALS7B", "SLC38A2", "EHF", "CLEC2B",
"IL20RB", "IL1R2", "IFI27", "CXCL14", "HLA-C", "GPSM2", "DAAM1", "ID1",
"RNASET2", "HOPX", "POU3F1", "SPRY1", "AR", "PDGFC",
"WFDC2", "WFDC5", "TSC22D3", "FGFR3", "LY6D", "IGFBP3",
# Other ORS
"APOE", "CTSB", "CALD1", "SOX4",
"STMN1", "LMO4", "CEBPB", "TMEM45A", "GPX2", "C1QTNF12", "GJB6",
"KRT6A", "KRT17", "RBP1", "CALML3", "PTN", "DAPK2",
"EGLN3", "FILIP1L", "ADGRL3", "FST", "EFNB2", "SEMA5A",
"FGFR1", "EGR2", "CLDN1", "DEFB1", "CARD18", "MGST1")
# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1] 89 2022
## Colors
list_colors = list()
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
sample_info$color)
list_colors[["population"]] = setNames(nm = c("IL1R2+ ORS", "other ORS"),
c("black", color_markers["ORS"]))
list_colors[["nFeature_RNA"]] = circlize::colorRamp2(breaks = seq(from = min(subsobj$nFeature_RNA),
to = max(subsobj$nFeature_RNA),
length.out = 9),
colors = RColorBrewer::brewer.pal(name = "Greys", n = 9))
# Cells order
column_order = subsobj@meta.data %>%
dplyr::mutate(seurat_clusters = factor(seurat_clusters, levels = c(5, 3, 0, 1, 7))) %>%
dplyr::arrange(sample_type, seurat_clusters, sample_identifier) %>%
rownames()
column_order = match(column_order, rownames(subsobj@meta.data))
# Annotation
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
sample_identifier = subsobj$sample_identifier,
population = ifelse(subsobj$cluster_type_sep5 == "ORS",
yes = "other ORS", no = "IL1R2+ ORS"),
col = list(sample_type = list_colors[["sample_type"]],
sample_identifier = list_colors[["sample_identifier"]],
population = list_colors[["population"]]))
ha_bottom = HeatmapAnnotation(nFeature_RNA = subsobj$nFeature_RNA,
col = list(nFeature_RNA = list_colors[["nFeature_RNA"]]))
# Heatmap
ht = Heatmap(as.matrix(mat_expr),
heatmap_legend_param = list(title = "Expression", at = c(0, 1),
labels = c("low", "high")),
col = list_colors[["expression"]],
top_annotation = ha_top,
bottom_annotation = ha_bottom,
# Cell grouping
column_split = subsobj$sample_type %>% as.character(),
cluster_columns = FALSE,
column_order = column_order,
column_title = NULL,
show_column_dend = FALSE,
show_column_names = FALSE,
# Genes
cluster_rows = FALSE,
row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
# Style
use_raster = FALSE,
show_heatmap_legend = TRUE,
border = TRUE)
ComplexHeatmap::draw(ht,
merge_legend = TRUE,
heatmap_legend_side = "right",
annotation_legend_side = "right")
Genes of interest :
genes = c("KRT16", "COL17A1", "DST", "KRT6B", "IL1R2", "WNT3",
"IFI27", "CXCL14", "IGFBP3", "KRT15", "CD200", "IL18")
plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
gene = genes[[gene_id]]
sobj_iblors$my_gene = Seurat::FetchData(sobj_iblors, gene)[, 1] %>%
aquarius::run_rescale(., new_min = 0, new_max = 10)
Seurat::FeaturePlot(sobj_iblors, features = "my_gene", reduction = name2D, pt.size = 0.25) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
breaks = seq(0, 10, by = 2.5),
labels = c("min", rep("", 3), "max")) +
ggplot2::labs(title = gene) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, size = 17),
plot.subtitle = element_text(hjust = 0.5, size = 15),
legend.text = element_text(size = 15),
legend.position = "none") +
Seurat::NoAxes()
})
names(plot_list) = genes
plot_list
## $KRT16
##
## $COL17A1
##
## $DST
##
## $KRT6B
##
## $IL1R2
##
## $WNT3
##
## $IFI27
##
## $CXCL14
##
## $IGFBP3
##
## $KRT15
##
## $CD200
##
## $IL18
We load the merged dataset :
sobj_traj = readRDS(paste0(data_dir, "/4_zoom/4_zoom_hfsc_iblmors/hfsc_iblmors_sobj_traj_tinga.rds"))
sobj_traj
## An object of class Seurat
## 17050 features across 4986 samples within 1 assay
## Active assay: RNA (17050 features, 2000 variable features)
## 8 dimensional reductions calculated: RNA_pca, RNA_pca_18_tsne, RNA_pca_18_umap, harmony, harmony_18_umap, harmony_18_tsne, harmony_dm, harmony_dm_5_umap
This is the projection name to visualize cells :
name2D = "harmony_dm"
We load the trajectory object for visualisation purpose :
my_traj = readRDS(paste0(data_dir, "/4_zoom/4_zoom_hfsc_iblmors/hfsc_iblmors_my_traj_tinga.rds"))
class(my_traj)
## [1] "dynwrap::with_dimred" "dynwrap::with_trajectory"
## [3] "dynwrap::data_wrapper" "list"
We defined cell type based on individual object :
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj_traj$cell_bc = colnames(sobj_traj)
sobj_traj$cluster_type = dplyr::left_join(sobj_traj@meta.data[, c("cell_bc", "percent.mt")],
sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
by = "cell_bc")[, "cluster_type"] %>% as.character()
sobj_traj$cluster_type = ifelse(colnames(sobj_traj) %in% colnames(sobj_hfsc),
yes = "HFSC",
no = sobj_traj$cluster_type) %>%
as.factor()
Cells on the full atlas :
sobj$cell_bc = colnames(sobj)
sobj_traj$cell_bc = colnames(sobj_traj)
sobj$is_traj = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
sobj_traj@meta.data[, c("cell_bc", "cluster_type")],
by = "cell_bc")[, "cluster_type"]
Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
group.by = "is_traj", order = levels(sobj_traj$cluster_type)) +
ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS", "HFSC")], bg_color),
breaks = c("IBL", "ORS", "HFSC", NA), na.value = bg_color) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() + Seurat::NoLegend()
Project name :
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj_traj), replace = FALSE, size = ncol(sobj_traj))
# Extract coordinates
cells_coord = sobj_traj@reductions[[name2D]]@cell.embeddings %>%
as.data.frame() %>%
`colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_type = sobj_traj$sample_type
cells_coord = cells_coord[order(sobj_traj$sample_type), ]
# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_type)) +
ggplot2::geom_point(size = 1.2) +
ggplot2::scale_color_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
ggplot2::theme_void() +
ggplot2::theme(aspect.ratio = 1,
legend.position = "none")
Cluster type :
Seurat::DimPlot(sobj_traj, reduction = name2D, pt.size = 0.5,
group.by = "cluster_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Pseudotime :
Seurat::FeaturePlot(sobj_traj, reduction = name2D, pt.size = 0.5,
features = "pseudotime") +
ggplot2::scale_color_gradientn(colors = viridis::viridis(n = 100)) +
ggplot2::lims(x = range(sobj_traj@reductions[[name2D]]@cell.embeddings[, 1]),
y = range(sobj_traj@reductions[[name2D]]@cell.embeddings[, 2])) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_blank()) +
Seurat::NoAxes()
Pseudotime with dynplot’s function :
dynplot::plot_dimred(trajectory = my_traj,
dimred = sobj_traj[[name2D]]@cell.embeddings,
# Cells
color_cells = 'pseudotime',
size_cells = 1.6,
border_radius_percentage = 0,
# Trajectory
plot_trajectory = TRUE,
color_trajectory = "none",
label_milestones = FALSE,
size_milestones = 0,
size_transitions = 1)
We load the immune cells dataset :
sobj_mat = readRDS(paste0(data_dir, "/4_zoom/5_zoom_matrix/matrix_sobj.rds"))
sobj_mat
## An object of class Seurat
## 15937 features across 1529 samples within 1 assay
## Active assay: RNA (15937 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_19_tsne, RNA_pca_19_umap, harmony, harmony_19_umap, harmony_19_tsne
This is the projection name to visualize cells :
name2D = "harmony_19_tsne"
To represent results from differential expression, we load the analyses results :
list_results = readRDS(paste0(data_dir, "/4_zoom/5_zoom_matrix/matrix_list_results.rds"))
lapply(list_results, FUN = names)
## $IRS
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
##
## $medulla
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
##
## $cortex
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
##
## $cuticle
## [1] "mark" "enrichr_hs" "enrichr_hd" "gsea"
We defined factor levels for cluster type :
sobj_mat$cluster_type = factor(sobj_mat$cluster_type,
levels = c("medulla", "cortex", "IRS", "cuticle"))
table(sobj_mat$cluster_type, sobj_mat$sample_identifier)
##
## HS_1 HS_2 HS_3 HS_4 HS_5 HD_1 HD_2
## medulla 6 7 24 117 36 41 101
## cortex 6 1 45 41 115 32 73
## IRS 1 1 12 20 47 15 52
## cuticle 14 11 86 166 197 55 207
Matrix cells on the full atlas :
sobj$is_matrix = (colnames(sobj) %in% colnames(sobj_mat))
Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
group.by = "is_matrix", order = "TRUE") +
ggplot2::scale_color_manual(values = c(category_color[["matrix"]], bg_color),
breaks = c(TRUE, FALSE)) +
ggplot2::labs(title = "Matrix cells",
subtitle = paste0(ncol(sobj_mat), " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
Cluster type :
Seurat::DimPlot(sobj_mat, reduction = name2D, pt.size = 1,
group.by = "cluster_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Clustering :
Seurat::DimPlot(sobj_mat, reduction = name2D, pt.size = 1,
group.by = "seurat_clusters", label = TRUE) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Barplot with proportion :
quantif_total = table(sobj$sample_identifier) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "nb_cells"))
# Number of matrix cells
quantif_immune = table(sobj_mat$sample_identifier, sobj_mat$cluster_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "CellType", "Number")) %>%
# proportion for the whole dataset
dplyr::left_join(., quantif_total, by = "Sample") %>%
dplyr::mutate(prop_cells = 100*round(Number/nb_cells,4)) %>%
`colnames<-`(c("Sample", "CellType", "Number", "Total", "Proportion"))
ggplot2::ggplot(data = quantif_immune,
aes(x = Sample, y = Proportion, fill = CellType)) +
ggplot2::geom_bar(stat = "identity", position = position_dodge(width = 0.7),
color = "black", size = 0.25, width = 0.7) +
ggplot2::scale_fill_manual(values = unlist(color_markers[levels(sobj_mat$cluster_type)]),
breaks = names(color_markers[levels(sobj_mat$cluster_type)]),
name = "Cell Type") +
# Theme
ggplot2::scale_y_continuous(limits = c(0, max(quantif_immune$Proportion)),
expand = ggplot2::expansion(add = c(0, 1))) +
ggplot2::labs(y = "% of matrix cells by sample") +
ggplot2::theme_classic() +
ggplot2::theme(axis.title.x = element_blank(),
axis.line.x = element_line(colour = "lightgray"),
text = element_text(size = 15),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.major.y = element_line(color = "lightgray", size = 0.5),
panel.grid.minor.y = element_line(color = "lightgray", size = 0.5))
GSEA plots :
the_gs_name = "GOCC_KERATIN_FILAMENT"
the_content = list_results$cortex$gsea@result %>%
dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
"\t|\tpvalue : ", round(the_content$pvalue, 4),
"\t|\tset size : ", the_content$setSize, " genes")
enrichplot::gseaplot2(x = list_results$cortex$gsea,
geneSetID = the_gs_name) +
ggplot2::labs(title = the_gs_name,
subtitle = the_subtitle) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
margin = ggplot2::margin(3, 3, 5, 3)),
plot.subtitle = ggtext::element_markdown(hjust = 0.5,
size = 10))
the_gs_name = "GOBP_INTERMEDIATE_FILAMENT_ORGANIZATION"
the_content = list_results$cortex$gsea@result %>%
dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
"\t|\tpvalue : ", round(the_content$pvalue, 4),
"\t|\tset size : ", the_content$setSize, " genes")
enrichplot::gseaplot2(x = list_results$cortex$gsea,
geneSetID = the_gs_name) +
ggplot2::labs(title = the_gs_name,
subtitle = the_subtitle) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
margin = ggplot2::margin(3, 3, 5, 3)),
plot.subtitle = ggtext::element_markdown(hjust = 0.5,
size = 10))
Violin plot
features_oi = c("KRT32")
plot_list = lapply(features_oi, FUN = function(feature) {
feature_expr = Seurat::FetchData(sobj_mat, feature)[, 1]
y_max = max(feature_expr)*1.1
# Dataframe stat
pop_oi = levels(sobj_mat$cluster_type)
df_stat = lapply(c(1:length(pop_oi)), FUN = function(i) {
pop = pop_oi[i]
# Stat
feature_hs = feature_expr[sobj_mat$sample_type == "HS" & sobj_mat$cluster_type == pop]
feature_hd = feature_expr[sobj_mat$sample_type == "HD" & sobj_mat$cluster_type == pop]
feature_hs_vs_hd = stats::t.test(feature_hs, feature_hd)
significance = significant(feature_hs_vs_hd$p.value)
# Output (x, pop, signif)
row_i = c(i, pop, significance)
return(row_i)
}) %>% do.call(rbind.data.frame, .) %>%
`colnames<-`(c("x", "pop", "signif")) %>%
dplyr::mutate(x = as.numeric(x))
# Plot
p = Seurat::VlnPlot(sobj_mat, features = feature,
group.by = "cluster_type",
split.by = "sample_type") +
ggplot2::scale_fill_manual(values = sample_type_colors,
breaks = names(sample_type_colors)) +
# Stat
ggplot2::geom_segment(data = df_stat, inherit.aes = FALSE, size = 0.5,
mapping = aes(x = x - 0.25, xend = x + 0.25),
y = y_max, yend = y_max) +
ggplot2::geom_label(data = df_stat, inherit.aes = FALSE,
mapping = aes(x = pop, label = signif),
y = y_max*1.05,
fill = NA, label.size = 0) +
# Theme
ggplot2::lims(y = c(0, y_max*1.1)) +
ggplot2::theme(axis.title.x = element_blank(),
legend.position = "right")
return(p)
})
names(plot_list) = features_oi
plot_list
## $KRT32
We load the dataset containing all cells :
sobj = readRDS(paste0(data_dir, "/5_wu/3_combined/wu_sobj.rds"))
sobj
## An object of class Seurat
## 17727 features across 17762 samples within 1 assay
## Active assay: RNA (17727 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne
This is the projection name to visualize cells :
name2D = "harmony_38_tsne"
name2D_atlas = name2D
These are all the samples analyzed :
sample_info = readRDS(paste0(data_dir, "/5_wu/1_metadata/wu_sample_info.rds"))
# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
as.data.frame.table(., stringsAsFactors = FALSE) %>%
`colnames<-`(c("sample_identifier", "nb_cells")) %>%
dplyr::left_join(x = ., y = sample_info, by = "sample_identifier")
# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
We define cluster type and cluster category :
sobj$cell_type = sobj$cell_type %>%
as.character() %>%
factor(., levels = names(color_markers))
cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(sobj$cell_type)[cluster_type])
sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
levels = levels(sobj$cell_type)) %>%
base::droplevels()
sobj$cluster_category = custom_order_cell_type[sobj$cluster_type, "cell_category"]
sobj$cluster_category = factor(sobj$cluster_category,
levels = names(category_color))
Project name :
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))
# Extract coordinates
cells_coord = sobj@reductions[[name2D_atlas]]@cell.embeddings %>%
as.data.frame() %>%
`colnames<-`(c("Dim1", "Dim2"))
cells_coord$project_name = sobj$project_name
cells_coord = cells_coord[(rnd_order), ]
# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = project_name)) +
ggplot2::geom_point(size = 0.5) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
ggplot2::theme_void() +
ggplot2::theme(aspect.ratio = 1,
legend.position = "none")
Cell type annotation :
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
group.by = "cell_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Cluster type annotation :
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
group.by = "cluster_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
Gene expression to assess annotation :
genes = c("PTPRC", "MSX2", "KRT14")
names(genes) = c("immune cells", "matrix cells", "non-matrix cells")
plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
gene = genes[[gene_id]]
pop = names(genes)[gene_id]
sobj$my_gene = Seurat::FetchData(sobj, gene)[, 1] %>%
aquarius::run_rescale(., new_min = 0, new_max = 10)
Seurat::FeaturePlot(sobj, features = "my_gene", reduction = name2D_atlas) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
breaks = seq(0, 10, by = 2.5),
labels = c("min", rep("", 3), "max")) +
ggplot2::labs(title = gene) +
# subtitle = pop) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, size = 17),
plot.subtitle = element_text(hjust = 0.5, size = 15),
legend.text = element_text(size = 15),
legend.position = "none") +
Seurat::NoAxes()
})
plot_list
## [[1]]
##
## [[2]]
##
## [[3]]
Dotplot :
custom_order_cell_type = custom_order_cell_type[levels(sobj$cluster_type), c("cell_type", "cell_category")]
plot_list = Seurat::DotPlot(sobj,
features = c("PTPRC",
"CD3E", "CD4",
"CD207", "AIF1",
# "PRDM1", "KRT85",
"MSX2",
"KRT32", "KRT35",
"KRT31", "PRR9",
"BAMBI", "ALDH1A3",
"KRT71", "KRT73",
"TOP2A", "MCM5",
"KRT14", "CXCL14",
"KRT15", "COL17A1",
"DIO2", "TCEAL2",
"KRT16", "KRT6C",
"SPINK5", "LY6D"),
group.by = "cluster_type", scale = TRUE,
scale.by = "radius", scale.min = NA, scale.max = NA) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(legend.position = "bottom",
legend.direction = "vertical",
# legend.justification = "bottom",
legend.box = "horizontal",
legend.box.margin = margin(0,25,0,0),
axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.line.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 11, color = "black"),
plot.margin = unit(c(0,0.5,0,0), "cm"))
p = ggplot2::ggplot(custom_order_cell_type, aes(y = cell_type, x = 0)) +
ggplot2::geom_point(size = 0) +
ggplot2::geom_segment(aes(y = 0.5, yend = 2.5, x = 0, xend = 0), size = 6, col = category_color["immune cells"]) +
ggplot2::geom_segment(aes(y = 2.5, yend = 7.5, x = 0, xend = 0), size = 6, col = category_color["matrix"]) +
ggplot2::geom_segment(aes(y = 7.5, yend = 11.5, x = 0, xend = 0), size = 6, col = category_color["non matrix"]) +
ggplot2::scale_x_continuous(expand = c(0,0), limits = c(0,0)) +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_blank(),
axis.line.x = element_blank(),
axis.text.y = element_text(size = 12, color = "black"),
plot.margin = unit(c(0.5,0,0,0.5), "cm"))
plot_list = patchwork::wrap_plots(p, plot_list,
nrow = 1, widths = c(1, 25))
plot_list
We load the IBL + ORS dataset :
sobj_iblors = readRDS(paste0(data_dir, "/5_wu/4_ibl_ors/iblmors_sobj.rds"))
sobj_iblors
## An object of class Seurat
## 15541 features across 8026 samples within 1 assay
## Active assay: RNA (15541 features, 2000 variable features)
## 6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne
This is the projection name to visualize cells :
name2D = "harmony_20_tsne"
To represent results from differential expression, we load the analyses results :
list_results = readRDS(paste0(data_dir, "/5_wu/4_ibl_ors/iblmors_list_results.rds"))
lapply(list_results, FUN = names)
## $IBL_vs_ORS
## [1] "mark" "enrichr_ibl" "enrichr_ors" "gsea"
We defined cluster type :
cluster_type = table(sobj_iblors$cell_type, sobj_iblors$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(sobj_iblors$cell_type)[cluster_type])
sobj_iblors$cluster_type = cluster_type[sobj_iblors$seurat_clusters]
sobj_iblors$cluster_type = factor(sobj_iblors$cluster_type,
levels = c("IBL", "ORS"))
IBL + ORS on the full atlas :
sobj$cell_bc = colnames(sobj)
sobj_iblors$cell_bc = colnames(sobj_iblors)
sobj$is_iblors = dplyr::left_join(sobj@meta.data[, c("cell_bc", "percent.mt")],
sobj_iblors@meta.data[, c("cell_bc", "cluster_type")],
by = "cell_bc")[, "cluster_type"]
Seurat::DimPlot(sobj, reduction = name2D_atlas, pt.size = 0.000001,
group.by = "is_iblors", order = FALSE) +
ggplot2::scale_color_manual(values = c(color_markers[c("IBL", "ORS")], bg_color),
breaks = c("IBL", "ORS", NA), na.value = bg_color) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_blank()) +
Seurat::NoAxes() + Seurat::NoLegend()
Cluster type :
Seurat::DimPlot(sobj_iblors, reduction = name2D, pt.size = 1,
group.by = "cluster_type", cols = color_markers) +
ggplot2::theme(aspect.ratio = 1) +
Seurat::NoAxes() +
Seurat::NoLegend()
DE genes between IBL and ORS :
mark = list_results$IBL_vs_ORS$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
# up-regulated in IBL
mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
# up-regulated in ORS
mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
# representative and selective for IBL
mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
# representative and selective for ORS
mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]
avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))
ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
ggplot2::geom_point() +
ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf, seed = 1,
aes(x = pct.1, y = pct.2, label = gene_name),
col = "black", fill = NA, size = 3.5, label.size = NA) +
ggplot2::labs(x = "Enriched in IBL",
y = "Enriched in ORS") +
ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
values = scales::rescale(unname(avg_logFC_range))) +
ggplot2::theme_classic() +
ggplot2::theme(aspect.ratio = 1)
GSEA plot :
the_gs_name = "REACTOME_KERATINIZATION"
the_content = list_results$IBL_vs_ORS$gsea@result %>%
dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
"\t|\tpvalue : ", round(the_content$pvalue, 4),
"\t|\tset size : ", the_content$setSize, " genes")
enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
geneSetID = the_gs_name) +
ggplot2::labs(title = the_gs_name,
subtitle = the_subtitle) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
margin = ggplot2::margin(3, 3, 5, 3)),
plot.subtitle = ggtext::element_markdown(hjust = 0.5,
size = 10))
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE"
the_content = list_results$IBL_vs_ORS$gsea@result %>%
dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
"\t|\tpvalue : ", round(the_content$pvalue, 4),
"\t|\tset size : ", the_content$setSize, " genes")
enrichplot::gseaplot2(x = list_results$IBL_vs_ORS$gsea,
geneSetID = the_gs_name) +
ggplot2::labs(title = the_gs_name,
subtitle = the_subtitle) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
margin = ggplot2::margin(3, 3, 5, 3)),
plot.subtitle = ggtext::element_markdown(hjust = 0.5,
size = 10))
Genes of interest :
genes = c("KRT16", "COL17A1", "DST", "KRT6B", "IL1R2", "WNT3",
"IFI27", "CXCL14", "IGFBP3", "KRT15", "CD200")
plot_list = lapply(c(1:length(genes)), FUN = function(gene_id) {
gene = genes[[gene_id]]
sobj_iblors$my_gene = Seurat::FetchData(sobj_iblors, gene)[, 1] %>%
aquarius::run_rescale(., new_min = 0, new_max = 10)
Seurat::FeaturePlot(sobj_iblors, features = "my_gene", reduction = name2D, pt.size = 0.25) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene,
breaks = seq(0, 10, by = 2.5),
labels = c("min", rep("", 3), "max")) +
ggplot2::labs(title = gene) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5, size = 17),
plot.subtitle = element_text(hjust = 0.5, size = 15),
legend.text = element_text(size = 15),
legend.position = "none") +
Seurat::NoAxes()
})
plot_list
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
In this section, we save files to associate with the manuscript, as supplementary tables.
We load the table :
package_version = read.table(paste0(".", "/data/info_to_install_2023_04_17.txt"),
header = TRUE)
head(package_version)
## order package_name version
## 1 1 acepack 1.4.1
## 2 2 ADGofTest 0.3
## 3 3 backports 1.2.1
## 4 4 base64enc 0.1-3
## 5 5 BH 1.72.0-3
## 6 6 bit 4.0.4
## url
## 1 https://cran.r-project.org/src/contrib/acepack_1.4.1.tar.gz
## 2 https://cran.r-project.org/src/contrib/ADGofTest_0.3.tar.gz
## 3 http://cran.r-project.org/src/contrib/Archive/backports/backports_1.2.1.tar.gz
## 4 https://cran.r-project.org/src/contrib/base64enc_0.1-3.tar.gz
## 5 http://cran.r-project.org/src/contrib/Archive/BH/BH_1.72.0-3.tar.gz
## 6 http://cran.r-project.org/src/contrib/Archive/bit/bit_4.0.4.tar.gz
## type
## 1 CRAN_last
## 2 CRAN_last
## 3 CRAN_archive
## 4 CRAN_last
## 5 CRAN_archive
## 6 CRAN_archive
Columns correspond to :
We save this table, except the last column (just for me) :
openxlsx::write.xlsx(package_version[, c(1:4)],
file = paste0(".", "/data/Supplementary Table 2.xlsx"))
We load the table :
cell_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 13 13 9 10
## B cells cuticle cortex medulla
## 16 15 16 10
## IRS proliferative IBL ORS
## 16 20 15 16
## IFE HFSC melanocytes sebocytes
## 17 17 10 8
We save this table, except the last column (just for me) :
openxlsx::write.xlsx(cell_markers,
file = paste0(".", "/data/Supplementary Table 3.xlsx"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] org.Mm.eg.db_3.10.0 AnnotationDbi_1.48.0 IRanges_2.20.2
## [4] S4Vectors_0.24.4 Biobase_2.46.0 BiocGenerics_0.32.0
## [7] ComplexHeatmap_2.14.0 ggplot2_3.3.5 patchwork_1.1.2
## [10] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 dyndimred_1.0.3
## [7] vctrs_0.3.8 usethis_2.0.1
## [9] dynwrap_1.2.1 blob_1.2.1
## [11] survival_3.2-13 prodlim_2019.11.13
## [13] dynutils_1.0.5 later_1.3.0
## [15] DBI_1.1.1 R.utils_2.11.0
## [17] SingleCellExperiment_1.8.0 rappdirs_0.3.3
## [19] uwot_0.1.8 dqrng_0.2.1
## [21] jpeg_0.1-8.1 zlibbioc_1.32.0
## [23] pspline_1.0-18 pcaMethods_1.78.0
## [25] mvtnorm_1.1-1 htmlwidgets_1.5.4
## [27] GlobalOptions_0.1.2 future_1.22.1
## [29] UpSetR_1.4.0 laeken_0.5.2
## [31] leiden_0.3.3 clustree_0.4.3
## [33] lmds_0.1.0 scater_1.14.6
## [35] irlba_2.3.3 markdown_1.1
## [37] DEoptimR_1.0-9 tidygraph_1.1.2
## [39] Rcpp_1.0.9 readr_2.0.2
## [41] KernSmooth_2.23-17 carrier_0.1.0
## [43] promises_1.1.0 gdata_2.18.0
## [45] DelayedArray_0.12.3 limma_3.42.2
## [47] graph_1.64.0 RcppParallel_5.1.4
## [49] Hmisc_4.4-0 fs_1.5.2
## [51] RSpectra_0.16-0 fastmatch_1.1-0
## [53] ranger_0.12.1 digest_0.6.25
## [55] png_0.1-7 sctransform_0.2.1
## [57] cowplot_1.0.0 DOSE_3.12.0
## [59] here_1.0.1 TInGa_0.0.0.9000
## [61] dynplot_1.1.0 ggraph_2.0.3
## [63] pkgconfig_2.0.3 GO.db_3.10.0
## [65] DelayedMatrixStats_1.8.0 gower_0.2.1
## [67] ggbeeswarm_0.6.0 iterators_1.0.12
## [69] DropletUtils_1.6.1 reticulate_1.26
## [71] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [73] circlize_0.4.15 beeswarm_0.4.0
## [75] GetoptLong_1.0.5 xfun_0.35
## [77] bslib_0.3.1 zoo_1.8-10
## [79] tidyselect_1.1.0 GA_3.2
## [81] reshape2_1.4.4 purrr_0.3.4
## [83] ica_1.0-2 pcaPP_1.9-73
## [85] viridisLite_0.3.0 rtracklayer_1.46.0
## [87] rlang_1.0.2 hexbin_1.28.1
## [89] jquerylib_0.1.4 dyneval_0.9.9
## [91] glue_1.4.2 waldo_0.3.1
## [93] RColorBrewer_1.1-2 matrixStats_0.56.0
## [95] stringr_1.4.0 lava_1.6.7
## [97] europepmc_0.3 DESeq2_1.26.0
## [99] recipes_0.1.17 labeling_0.3
## [101] httpuv_1.5.2 class_7.3-17
## [103] BiocNeighbors_1.4.2 DO.db_2.9
## [105] annotate_1.64.0 jsonlite_1.7.2
## [107] XVector_0.26.0 bit_4.0.4
## [109] mime_0.9 aquarius_0.1.5
## [111] Rsamtools_2.2.3 gridExtra_2.3
## [113] gplots_3.0.3 stringi_1.4.6
## [115] processx_3.5.2 gsl_2.1-6
## [117] bitops_1.0-6 cli_3.0.1
## [119] batchelor_1.2.4 RSQLite_2.2.0
## [121] randomForest_4.6-14 tidyr_1.1.4
## [123] data.table_1.14.2 rstudioapi_0.13
## [125] units_0.7-2 GenomicAlignments_1.22.1
## [127] nlme_3.1-147 qvalue_2.18.0
## [129] scran_1.14.6 locfit_1.5-9.4
## [131] scDblFinder_1.1.8 listenv_0.8.0
## [133] ggthemes_4.2.4 gridGraphics_0.5-0
## [135] R.oo_1.24.0 dbplyr_1.4.4
## [137] TTR_0.24.2 readxl_1.3.1
## [139] lifecycle_1.0.1 timeDate_3043.102
## [141] ggpattern_0.3.1 munsell_0.5.0
## [143] cellranger_1.1.0 R.methodsS3_1.8.1
## [145] proxyC_0.1.5 visNetwork_2.0.9
## [147] caTools_1.18.0 codetools_0.2-16
## [149] GenomeInfoDb_1.22.1 vipor_0.4.5
## [151] lmtest_0.9-38 msigdbr_7.5.1
## [153] htmlTable_1.13.3 triebeard_0.3.0
## [155] lsei_1.2-0 xtable_1.8-4
## [157] ROCR_1.0-7 classInt_0.4-3
## [159] BiocManager_1.30.10 scatterplot3d_0.3-41
## [161] abind_1.4-5 farver_2.0.3
## [163] parallelly_1.28.1 RANN_2.6.1
## [165] askpass_1.1 GenomicRanges_1.38.0
## [167] RcppAnnoy_0.0.16 tibble_3.1.5
## [169] ggdendro_0.1-20 cluster_2.1.0
## [171] future.apply_1.5.0 Seurat_3.1.5
## [173] dendextend_1.15.1 Matrix_1.3-2
## [175] ellipsis_0.3.2 prettyunits_1.1.1
## [177] lubridate_1.7.9 ggridges_0.5.2
## [179] igraph_1.2.5 RcppEigen_0.3.3.7.0
## [181] fgsea_1.12.0 remotes_2.4.2
## [183] scBFA_1.0.0 destiny_3.0.1
## [185] VIM_6.1.1 testthat_3.1.0
## [187] htmltools_0.5.2 BiocFileCache_1.10.2
## [189] yaml_2.2.1 utf8_1.1.4
## [191] plotly_4.9.2.1 XML_3.99-0.3
## [193] ModelMetrics_1.2.2.2 e1071_1.7-3
## [195] foreign_0.8-76 withr_2.5.0
## [197] fitdistrplus_1.0-14 BiocParallel_1.20.1
## [199] xgboost_1.4.1.1 bit64_4.0.5
## [201] foreach_1.5.0 robustbase_0.93-9
## [203] Biostrings_2.54.0 GOSemSim_2.13.1
## [205] rsvd_1.0.3 memoise_2.0.0
## [207] evaluate_0.18 forcats_0.5.0
## [209] rio_0.5.16 geneplotter_1.64.0
## [211] tzdb_0.1.2 caret_6.0-86
## [213] ps_1.6.0 DiagrammeR_1.0.6.1
## [215] curl_4.3 fdrtool_1.2.15
## [217] fansi_0.4.1 highr_0.8
## [219] urltools_1.7.3 xts_0.12.1
## [221] GSEABase_1.48.0 acepack_1.4.1
## [223] edgeR_3.28.1 checkmate_2.0.0
## [225] scds_1.2.0 cachem_1.0.6
## [227] npsurv_0.4-0 babelgene_22.3
## [229] rjson_0.2.20 openxlsx_4.1.5
## [231] ggrepel_0.9.1 clue_0.3-60
## [233] rprojroot_2.0.2 stabledist_0.7-1
## [235] tools_3.6.3 sass_0.4.0
## [237] nichenetr_1.1.1 magrittr_2.0.1
## [239] RCurl_1.98-1.2 proxy_0.4-24
## [241] car_3.0-11 ape_5.3
## [243] ggplotify_0.0.5 xml2_1.3.2
## [245] httr_1.4.2 assertthat_0.2.1
## [247] rmarkdown_2.18 boot_1.3-25
## [249] globals_0.14.0 R6_2.4.1
## [251] Rhdf5lib_1.8.0 nnet_7.3-14
## [253] RcppHNSW_0.2.0 progress_1.2.2
## [255] genefilter_1.68.0 statmod_1.4.34
## [257] gtools_3.8.2 shape_1.4.6
## [259] sf_1.0-3 HDF5Array_1.14.4
## [261] BiocSingular_1.2.2 rhdf5_2.30.1
## [263] splines_3.6.3 AUCell_1.8.0
## [265] carData_3.0-4 colorspace_1.4-1
## [267] generics_0.1.0 base64enc_0.1-3
## [269] dynfeature_1.0.0 smoother_1.1
## [271] gridtext_0.1.1 pillar_1.6.3
## [273] tweenr_1.0.1 sp_1.4-1
## [275] ggplot.multistats_1.0.0 rvcheck_0.1.8
## [277] GenomeInfoDbData_1.2.2 plyr_1.8.6
## [279] gtable_0.3.0 zip_2.2.0
## [281] knitr_1.41 latticeExtra_0.6-29
## [283] biomaRt_2.42.1 fastmap_1.1.0
## [285] ADGofTest_0.3 copula_1.0-0
## [287] doParallel_1.0.15 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] ipred_0.9-12 enrichplot_1.6.1
## [295] hms_1.1.1 ggforce_0.3.1
## [297] Rtsne_0.15 shiny_1.7.1
## [299] gridpattern_0.3.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 lazyeval_0.2.2
## [303] Formula_1.2-3 tsne_0.1-3
## [305] crayon_1.3.4 MASS_7.3-54
## [307] pROC_1.16.2 viridis_0.5.1
## [309] dynparam_1.0.0 rpart_4.1-15
## [311] zinbwave_1.8.0 compiler_3.6.3
## [313] ggtext_0.1.0